June Python User Group Lightning Talk - 3 bug bounties in 3 minutes
tinigrad invert_dict error
Compare 3 implementations, 1. original with reversed
items. 2. invert_dict_norev dedup_dict_norev without reversed
items 3 invert_dict_sorted and dedupe_dict_sorted
dedupe_dict
>>> def invert_dict(d):
... return {v: k for k, v in reversed(d.items())}
>>> def dedup_dict(d):
... return invert_dict(invert_dict(d))
>>> d = dict((i, i) for i in range(1_000_000))
>>> %timeit dedup_dict(d)
159 ms ± 1.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
>>> d = dict((i, 1_000_000 - i) for i in range(1_000_000))
>>> %timeit dedup_dict(d)
154 ms ± 879 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
dedupe_dict_norev
>>> def invert_dict(d):
... return {v: k for k, v in d.items()}
>>> def dedup_dict(d):
... return invert_dict(invert_dict(d))
dedupe_dict_sorted
>>> def invert_dict(d): return {v: k for k, v in sorted(d.items())}
>>> def dedup_dict(d): return invert_dict(invert_dict(d))
Compare 7 sort-order initial dicts: 1. sorted keys and values, 2. antisorted values, 3. antisorted keys 4. antisorted keys and values 5. random keys 6. random values 7. random keys and values, and 2 datatypes: int, float, and % duplicates: 0% 1% 10%
pd.DataFrame():
for keysort, valsort, dtype in product([sorted, sorted, reversed, reversed], [sorted, reversed, sorted, reversed], [int, float], [0, .01, .10]):
random float keys and values 6.
...: def invert_dict(d): return {v: k for k, v in reversed(d.items())}
...: def dedup_dict(d): return invert_dict(invert_dict(d))
In [83]: >>> d = dict((i, i) for i in range(1_000_000))
...: >>> %timeit dedup_dict(d)
159 ms ± 1.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [84]: >>> d = dict((i, 1_000_000 - i) for i in range(1_000_000))
...: >>> %timeit dedup_dict(d)
154 ms ± 879 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
>>> d = dict((i, (1_000_000 - i) % 10) for i in range(1_000_000))
>>> %timeit dedup_dict(d)
>>> d = dict((i, (1_000_000 - i) % 10) for i in range(1_000_000))
>>> d = dict((i, i % 10) for i in range(1_000_000))
>>> %timeit dedup_dict(d)
>>> d = dict((i, i % 10) for i in range(1_000_000))
In [64]:
...: def invert_dict(d): return {v: k for k, v in reversed(d.items())}
...: def dedup_dict(d): return invert_dict(invert_dict(d))
In [65]: d = dict((i, i % 10) for i in range(1_000_000))
In [66]: dedup_dict(d)
Out[66]: {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
In [67]: %timeit dedup_dict(d)
45.5 ms ± 729 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [68]: d = dict((i, i % 10) for i in reversed(range(1_000_000)))
In [69]: %timeit dedup_dict(d)
45.1 ms ± 749 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [70]: d = dict((i, (1_000_000 - i) % 10) for i in range(1_000_000))
In [71]: %timeit dedup_dict(d)
45.2 ms ± 473 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [72]:
...: def invert_dict(d): return {v: k for k, v in d.items()}
...: def dedup_dict(d): return invert_dict(invert_dict(d))
In [73]: %timeit dedup_dict(d)
45.4 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [74]: d = dict((i, i % 10) for i in reversed(range(1_000_000)))
In [75]: %timeit dedup_dict(d)
44.4 ms ± 237 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
def invert_dict(d): return {v: k for k, v in reversed(d.items())}
def dedup_dict(d): return invert_dict(invert_dict(d))
>>> d = dict(zip('abcd', range(4)))
>>> d
{'a': 0, 'b': 1, 'c': 2, 'd': 3}
>>> dict(reversed(d.items()))
{'d': 3, 'c': 2, 'b': 1, 'a': 0}
>>> {v, k for k, v in reversed(d.items())}
>>> {v, k for (k, v) in reversed(d.items())}
>>> {(v, k) for k, v in reversed(d.items())}
{(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')}
>>> {v: k for k, v in reversed(d.items())}
{3: 'd', 2: 'c', 1: 'b', 0: 'a'}
>>> dict(v: k for k, v in reversed(d.items())])
>>> dict((v, k) for k, v in reversed(d.items()))
{3: 'd', 2: 'c', 1: 'b', 0: 'a'}
>>> {v: k for k, v in d.items()}
{0: 'a', 1: 'b', 2: 'c', 3: 'd'}
>>> dict((v, k) for k, v in d.items())
{0: 'a', 1: 'b', 2: 'c', 3: 'd'}
dict(reversed(d.items()))
The function `invert_dict` uses the `reversed` builtin unnecessarily. Like `sorted`, `reversed` merely
>>> reversed(zip('abcd','4321'))
>>> d = dict(zip('abcd','4321'))
>>> d
{'a': '4', 'b': '3', 'c': '2', 'd': '1'}
>>> reversed(d.items())
<dict_reverseitemiterator at 0x7fb3f1d6c7c0>
>>> dict(reversed(d.items()))
{'d': '1', 'c': '2', 'b': '3', 'a': '4'}
>>> list(reversed(d.items()))
[('d', '1'), ('c', '2'), ('b', '3'), ('a', '4')]